library(bupaverse)
## Warning: package 'bupaverse' was built under R version 4.2.3
##
## .______ __ __ .______ ___ ____ ____ _______ .______ _______. _______
## | _ \ | | | | | _ \ / \ \ \ / / | ____|| _ \ / || ____|
## | |_) | | | | | | |_) | / ^ \ \ \/ / | |__ | |_) | | (----`| |__
## | _ < | | | | | ___/ / /_\ \ \ / | __| | / \ \ | __|
## | |_) | | `--' | | | / _____ \ \ / | |____ | |\ \----.----) | | |____
## |______/ \______/ | _| /__/ \__\ \__/ |_______|| _| `._____|_______/ |_______|
##
## ── Attaching packages ─────────────────────────────────────── bupaverse 0.1.0 ──
## ✔ bupaR 0.5.3 ✔ processcheckR 0.1.4
## ✔ edeaR 0.9.4 ✔ processmapR 0.5.3
## ✔ eventdataR 0.3.1
## Warning: package 'bupaR' was built under R version 4.2.3
## Warning: package 'processcheckR' was built under R version 4.2.3
## ── Conflicts ────────────────────────────────────────── bupaverse_conflicts() ──
## ✖ bupaR::filter() masks stats::filter()
## ✖ processmapR::frequency() masks stats::frequency()
## ✖ edeaR::setdiff() masks base::setdiff()
## ✖ bupaR::timestamp() masks utils::timestamp()
## ✖ processcheckR::xor() masks base::xor()
library(daqapo)
## Warning: package 'daqapo' was built under R version 4.2.3
##
## Attaching package: 'daqapo'
##
## The following object is masked from 'package:eventdataR':
##
## hospital
##
## The following object is masked from 'package:utils':
##
## fix
# event_log <- xesreadR::read_xes("../2_to_xes/mimicel.xes", validate = FALSE)
When import XES file into bupaR, it will prompt the
following error messages. Hence, we use CSV file to assess the data
quality of event log instead.
Error: cannot allocate vector of size 1.6 Gb
10. id(list(col_id, row_id), drop = FALSE)
9. spread.data.frame(., type, value)
8. spread(., type, value)
7. select(., -attr_id)
6. list2(...)
5. bind_cols(., eventlog)
4. select(., -n_attributes, -attr_id)
3. spread(., key, value)
2. all_attrs %>% unlist() %>% as_data_frame() %>% mutate(type = rep(c("key",
"value"), length = nrow(.)), attr_id = rep(1:(nrow(.)/2),
each = 2)) %>% spread(type, value) %>% select(-attr_id) %>%
bind_cols(eventlog) %>% select(-n_attributes, -attr_id) %>% ...
1. xesreadR::read_xes("../2_to_xes/mimicel.xes", validate = FALSE)
Import event log form csv
eventlog_df <-
read.csv('../2_to_xes/mimicel.csv', sep=",", na.strings = c("", " "))
Convert dataframe to event log and activity log , add activity_instance_id, add lifecycle_id
eventlog_df %>%
bupaR::convert_timestamps(columns="timestamps", format = ymd_hms) %>%
bupaR::mutate(resource_id = NA) %>%
bupaR::mutate(lifecycle_id = "complete") %>%
bupaR::mutate(activity_instance_id = as.numeric(row.names(.))) %>%
bupaR::eventlog(case_id = "stay_id",
activity_id = "activity",
activity_instance_id = "activity_instance_id",
timestamp = "timestamps",
lifecycle_id = "lifecycle_id",
resource_id = "resource_id") -> event_log
# package `daqapo` requires `activitylog` for validating data quality
event_log %>%
bupaR::to_activitylog() -> activity_log
## Warning in to_activitylog.eventlog(.): No start events were found. Creating and
## initialising 'start' to NA.
event_logShow identifiers for event_log
event_log %>% bupaR::mapping()
## Case identifier: stay_id
## Activity identifier: activity
## Resource identifier: resource_id
## Activity instance identifier: activity_instance_id
## Timestamp: timestamps
## Lifecycle transition: lifecycle_id
Show activity, event, case, and trace of event_log
event_log %>% bupaR::n_activities()
## [1] 6
event_log %>% bupaR::n_events()
## [1] 7568824
event_log %>% bupaR::n_cases()
## [1] 425028
event_log %>% bupaR::n_traces()
## [1] 175671
Show unique activities
event_log %>%
bupaR::activities()
## # A tibble: 6 × 3
## activity absolute_frequency relative_frequency
## <chr> <int> <dbl>
## 1 Medicine reconciliation 2953118 0.390
## 2 Medicine dispensations 1441839 0.190
## 3 Vital sign check 1423734 0.188
## 4 Discharge from the ED 900077 0.119
## 5 Enter the ED 425028 0.0562
## 6 Triage in the ED 425028 0.0562
Show unique traces
event_log %>% bupaR::traces()
## # A tibble: 175,671 × 3
## trace absolute_frequency relative_frequency
## <chr> <int> <dbl>
## 1 Enter the ED,Triage in the ED,Vital si… 7108 0.0167
## 2 Enter the ED,Triage in the ED,Discharg… 4994 0.0117
## 3 Enter the ED,Triage in the ED,Vital si… 4244 0.00999
## 4 Enter the ED,Triage in the ED,Vital si… 3937 0.00926
## 5 Enter the ED,Triage in the ED,Vital si… 3216 0.00757
## 6 Enter the ED,Triage in the ED,Discharg… 2129 0.00501
## 7 Enter the ED,Triage in the ED,Vital si… 1696 0.00399
## 8 Enter the ED,Triage in the ED,Vital si… 1515 0.00356
## 9 Enter the ED,Triage in the ED,Vital si… 1431 0.00337
## 10 Enter the ED,Triage in the ED,Medicine… 1349 0.00317
## # ℹ 175,661 more rows
The table below summarizes the different data quality assessment
tests available in daqapo, after which each test will be
briefly demonstrated.
| Function name | Description | Output |
|---|---|---|
| detect_activity_frequency_violations | Function that detects activity frequency anomalies per case | Summary in console + Returns activities in cases which are executed too many times |
| detect_attribute_dependencies | Function detecting violations of dependencies between attributes (i.e. condition(s) that should hold when (an)other condition(s) hold(s)) | Summary in console + Returns rows with dependency violations |
| detect_missing_values | Function detecting missing values at different levels of aggregation | Summary in console + Returns rows with NAs |
| detect_multiregistration | Function detecting the registration of a series of events in a short time period for the same case or by the same resource | Summary in console + Returns rows with multiregistration on resource or case level |
| detect_unique_values | Function listing all distinct combinations of the given log attributes | Summary in console + Returns all unique combinations of values in given columns |
| detect_value_range_violations | Function detecting violations of the range of acceptable values | Summary in console + Returns rows with value range infringements |
activity_log %>% daqapo::detect_activity_frequency_violations("Enter the ED" = 1, "Triage in the ED" = 1)
## *** OUTPUT ***
## For 0 cases in the activity log (0%) an anomaly is detected.
Detect cases with disposition == "HOME" which
hadm_id is NA
activity_log %>%
bupaR::filter(activity == "Discharge from the ED", (is.na(seq_num)|seq_num == 1) ) %>%
daqapo::detect_attribute_dependencies(antecedent = (disposition == "HOME"),
consequent = is.na(hadm_id))
## *** OUTPUT ***
## The following statement was checked: if condition(s) ~(disposition == "HOME") hold(s), then ~is.na(hadm_id) should also hold.
## This statement holds for 205129 (84.9%) of the rows in the activity log for which the first condition(s) hold and does not hold for 36497 (15.1%) of these rows.
## For the following rows, the first condition(s) hold(s), but the second condition does not:
## # Log of 241622 events consisting of:
## 1 trace
## 241622 cases
## 241622 instances of 1 activity
## 1 resource
## Events occurred from NA until NA
##
## # Variables were mapped as follows:
## Case identifier: stay_id
## Activity identifier: activity
## Resource identifier: resource_id
## Timestamps: start, complete
##
## # A tibble: 241,622 × 35
## stay_id subject_id hadm_id activity gender race arrival_transport
## <int> <int> <int> <chr> <chr> <chr> <chr>
## 1 30000204 11615015 25540031 Discharge from t… <NA> <NA> <NA>
## 2 30000252 18684072 28532292 Discharge from t… <NA> <NA> <NA>
## 3 30000254 11158447 NA Discharge from t… <NA> <NA> <NA>
## 4 30000262 19454512 NA Discharge from t… <NA> <NA> <NA>
## 5 30000291 11212357 NA Discharge from t… <NA> <NA> <NA>
## 6 30000389 11928692 NA Discharge from t… <NA> <NA> <NA>
## 7 30000417 10275184 NA Discharge from t… <NA> <NA> <NA>
## 8 30000443 12356587 NA Discharge from t… <NA> <NA> <NA>
## 9 30000448 15135064 NA Discharge from t… <NA> <NA> <NA>
## 10 30000479 19039924 NA Discharge from t… <NA> <NA> <NA>
## # ℹ 241,612 more rows
## # ℹ 28 more variables: disposition <chr>, seq_num <int>, icd_code <chr>,
## # icd_version <int>, icd_title <chr>, temperature <dbl>, heartrate <dbl>,
## # resprate <dbl>, o2sat <dbl>, sbp <dbl>, dbp <dbl>, pain <chr>,
## # acuity <dbl>, chiefcomplaint <chr>, rhythm <chr>, name <chr>, gsn <int>,
## # ndc <dbl>, etc_rn <int>, etccode <int>, etcdescription <chr>, med_rn <int>,
## # gsn_rn <int>, resource_id <lgl>, activity_instance_id <dbl>, …
Detect cases with disposition == "ADMITTED" which
hadm_id is NA
activity_log %>%
bupaR::filter((activity =="Discharge from the ED") & (is.na(seq_num) | seq_num == 1) ) %>%
daqapo::detect_attribute_dependencies(antecedent = (disposition == "ADMITTED"),
consequent = is.na(hadm_id))
## *** OUTPUT ***
## The following statement was checked: if condition(s) ~(disposition == "ADMITTED") hold(s), then ~is.na(hadm_id) should also hold.
## This statement holds for 384 (0.24%) of the rows in the activity log for which the first condition(s) hold and does not hold for 157626 (99.76%) of these rows.
## For the following rows, the first condition(s) hold(s), but the second condition does not:
## # Log of 158010 events consisting of:
## 1 trace
## 158010 cases
## 158010 instances of 1 activity
## 1 resource
## Events occurred from NA until NA
##
## # Variables were mapped as follows:
## Case identifier: stay_id
## Activity identifier: activity
## Resource identifier: resource_id
## Timestamps: start, complete
##
## # A tibble: 158,010 × 35
## stay_id subject_id hadm_id activity gender race arrival_transport
## <int> <int> <int> <chr> <chr> <chr> <chr>
## 1 30000012 11714491 21562392 Discharge from t… <NA> <NA> <NA>
## 2 30000038 13821532 26255538 Discharge from t… <NA> <NA> <NA>
## 3 30000039 13340997 23100190 Discharge from t… <NA> <NA> <NA>
## 4 30000177 17937834 23831044 Discharge from t… <NA> <NA> <NA>
## 5 30000275 13297743 26874680 Discharge from t… <NA> <NA> <NA>
## 6 30000317 13658097 23069398 Discharge from t… <NA> <NA> <NA>
## 7 30000368 18563034 29198602 Discharge from t… <NA> <NA> <NA>
## 8 30000379 15293245 21532833 Discharge from t… <NA> <NA> <NA>
## 9 30000426 16592013 26871835 Discharge from t… <NA> <NA> <NA>
## 10 30000492 15071337 27867822 Discharge from t… <NA> <NA> <NA>
## # ℹ 158,000 more rows
## # ℹ 28 more variables: disposition <chr>, seq_num <int>, icd_code <chr>,
## # icd_version <int>, icd_title <chr>, temperature <dbl>, heartrate <dbl>,
## # resprate <dbl>, o2sat <dbl>, sbp <dbl>, dbp <dbl>, pain <chr>,
## # acuity <dbl>, chiefcomplaint <chr>, rhythm <chr>, name <chr>, gsn <int>,
## # ndc <dbl>, etc_rn <int>, etccode <int>, etcdescription <chr>, med_rn <int>,
## # gsn_rn <int>, resource_id <lgl>, activity_instance_id <dbl>, …
Overview missing values for each column
activity_log %>%
daqapo::detect_missing_values(level_of_aggregation = "overview")
## Selected level of aggregation:overview
## *** OUTPUT ***
## Absolute number of missing values per column:
##
## stay_id 0
## subject_id 0
## hadm_id 3015236
## activity 0
## gender 7143796
## race 7143796
## arrival_transport 7143796
## disposition 6668747
## seq_num 6669845
## icd_code 6669845
## icd_version 6669845
## icd_title 6669845
## temperature 6259715
## heartrate 5801023
## resprate 5822404
## o2sat 5864053
## sbp 5812828
## dbp 5813628
## pain 6131510
## acuity 7150772
## chiefcomplaint 7143816
## rhythm 7515113
## name 3173867
## gsn 3206271
## ndc 4615706
## etc_rn 4615706
## etccode 4627281
## etcdescription 4627281
## med_rn 6126985
## gsn_rn 6126985
## resource_id 7568824
## activity_instance_id 0
## .order 0
## complete 0
## start 7568824
## Relative number of missing values per column (expressed as percentage):
##
## stay_id 0.00000
## subject_id 0.00000
## hadm_id 39.83758
## activity 0.00000
## gender 94.38449
## race 94.38449
## arrival_transport 94.38449
## disposition 88.10810
## seq_num 88.12261
## icd_code 88.12261
## icd_version 88.12261
## icd_title 88.12261
## temperature 82.70393
## heartrate 76.64365
## resprate 76.92614
## o2sat 77.47641
## sbp 76.79962
## dbp 76.81019
## pain 81.01008
## acuity 94.47666
## chiefcomplaint 94.38476
## rhythm 99.29037
## name 41.93342
## gsn 42.36155
## ndc 60.98313
## etc_rn 60.98313
## etccode 61.13606
## etcdescription 61.13606
## med_rn 80.95029
## gsn_rn 80.95029
## resource_id 100.00000
## activity_instance_id 0.00000
## .order 0.00000
## complete 0.00000
## start 100.00000
## Overview of activity log rows which are incomplete:
## # Log of 7568824 events consisting of:
## 425028 cases
## 7568824 instances of 6 activities
## 1 resource
## Events occurred from NA until NA
##
## # Variables were mapped as follows:
## Case identifier: stay_id
## Activity identifier: activity
## Resource identifier: resource_id
## Timestamps: start, complete
##
## # A tibble: 7,568,824 × 35
## stay_id subject_id hadm_id activity gender race arrival_transport
## <int> <int> <int> <chr> <chr> <chr> <chr>
## 1 30000012 11714491 21562392 Vital sign check <NA> <NA> <NA>
## 2 30000012 11714491 21562392 Enter the ED F WHITE AMBULANCE
## 3 30000012 11714491 21562392 Triage in the ED <NA> <NA> <NA>
## 4 30000012 11714491 21562392 Medicine reconci… <NA> <NA> <NA>
## 5 30000012 11714491 21562392 Medicine reconci… <NA> <NA> <NA>
## 6 30000012 11714491 21562392 Medicine reconci… <NA> <NA> <NA>
## 7 30000012 11714491 21562392 Medicine reconci… <NA> <NA> <NA>
## 8 30000012 11714491 21562392 Medicine reconci… <NA> <NA> <NA>
## 9 30000012 11714491 21562392 Medicine reconci… <NA> <NA> <NA>
## 10 30000012 11714491 21562392 Medicine reconci… <NA> <NA> <NA>
## # ℹ 7,568,814 more rows
## # ℹ 28 more variables: disposition <chr>, seq_num <int>, icd_code <chr>,
## # icd_version <int>, icd_title <chr>, temperature <dbl>, heartrate <dbl>,
## # resprate <dbl>, o2sat <dbl>, sbp <dbl>, dbp <dbl>, pain <chr>,
## # acuity <dbl>, chiefcomplaint <chr>, rhythm <chr>, name <chr>, gsn <int>,
## # ndc <dbl>, etc_rn <int>, etccode <int>, etcdescription <chr>, med_rn <int>,
## # gsn_rn <int>, resource_id <lgl>, activity_instance_id <dbl>, …
Detect missing values for gender
activity_log %>%
bupaR::filter(activity=="Enter the ED") %>%
daqapo::detect_missing_values(level_of_aggregation = "column",
column = "gender")
## Selected level of aggregation:column
## *** OUTPUT ***
## Absolute number of missing values in columngender:0
## Relative number of missing values in columngender(expressed as percentage):0
##
## Overview of activity log rows in whichgenderis missing:
## EMPTY EVENT LOG
## # A tibble: 0 × 35
## # ℹ 35 variables: stay_id <int>, subject_id <int>, hadm_id <int>,
## # activity <chr>, gender <chr>, race <chr>, arrival_transport <chr>,
## # disposition <chr>, seq_num <int>, icd_code <chr>, icd_version <int>,
## # icd_title <chr>, temperature <dbl>, heartrate <dbl>, resprate <dbl>,
## # o2sat <dbl>, sbp <dbl>, dbp <dbl>, pain <chr>, acuity <dbl>,
## # chiefcomplaint <chr>, rhythm <chr>, name <chr>, gsn <int>, ndc <dbl>,
## # etc_rn <int>, etccode <int>, etcdescription <chr>, med_rn <int>, …
Detect missing values for race
activity_log %>%
bupaR::filter(activity=="Enter the ED") %>%
daqapo::detect_missing_values(level_of_aggregation = "column",
column = "race")
## Selected level of aggregation:column
## *** OUTPUT ***
## Absolute number of missing values in columnrace:0
## Relative number of missing values in columnrace(expressed as percentage):0
##
## Overview of activity log rows in whichraceis missing:
## EMPTY EVENT LOG
## # A tibble: 0 × 35
## # ℹ 35 variables: stay_id <int>, subject_id <int>, hadm_id <int>,
## # activity <chr>, gender <chr>, race <chr>, arrival_transport <chr>,
## # disposition <chr>, seq_num <int>, icd_code <chr>, icd_version <int>,
## # icd_title <chr>, temperature <dbl>, heartrate <dbl>, resprate <dbl>,
## # o2sat <dbl>, sbp <dbl>, dbp <dbl>, pain <chr>, acuity <dbl>,
## # chiefcomplaint <chr>, rhythm <chr>, name <chr>, gsn <int>, ndc <dbl>,
## # etc_rn <int>, etccode <int>, etcdescription <chr>, med_rn <int>, …
Detect missing values for arrival_transport
activity_log %>%
bupaR::filter(activity=="Enter the ED") %>%
daqapo::detect_missing_values(level_of_aggregation = "column",
column = "arrival_transport")
## Selected level of aggregation:column
## *** OUTPUT ***
## Absolute number of missing values in columnarrival_transport:0
## Relative number of missing values in columnarrival_transport(expressed as percentage):0
##
## Overview of activity log rows in whicharrival_transportis missing:
## EMPTY EVENT LOG
## # A tibble: 0 × 35
## # ℹ 35 variables: stay_id <int>, subject_id <int>, hadm_id <int>,
## # activity <chr>, gender <chr>, race <chr>, arrival_transport <chr>,
## # disposition <chr>, seq_num <int>, icd_code <chr>, icd_version <int>,
## # icd_title <chr>, temperature <dbl>, heartrate <dbl>, resprate <dbl>,
## # o2sat <dbl>, sbp <dbl>, dbp <dbl>, pain <chr>, acuity <dbl>,
## # chiefcomplaint <chr>, rhythm <chr>, name <chr>, gsn <int>, ndc <dbl>,
## # etc_rn <int>, etccode <int>, etcdescription <chr>, med_rn <int>, …
Detect missing values for disposition
activity_log %>%
bupaR::filter(activity=="Discharge from the ED") %>%
daqapo::detect_missing_values(level_of_aggregation = "column",
column = "disposition")
## Selected level of aggregation:column
## *** OUTPUT ***
## Absolute number of missing values in columndisposition:0
## Relative number of missing values in columndisposition(expressed as percentage):0
##
## Overview of activity log rows in whichdispositionis missing:
## EMPTY EVENT LOG
## # A tibble: 0 × 35
## # ℹ 35 variables: stay_id <int>, subject_id <int>, hadm_id <int>,
## # activity <chr>, gender <chr>, race <chr>, arrival_transport <chr>,
## # disposition <chr>, seq_num <int>, icd_code <chr>, icd_version <int>,
## # icd_title <chr>, temperature <dbl>, heartrate <dbl>, resprate <dbl>,
## # o2sat <dbl>, sbp <dbl>, dbp <dbl>, pain <chr>, acuity <dbl>,
## # chiefcomplaint <chr>, rhythm <chr>, name <chr>, gsn <int>, ndc <dbl>,
## # etc_rn <int>, etccode <int>, etcdescription <chr>, med_rn <int>, …
Detect missing values for acuity
activity_log %>%
bupaR::filter(activity=="Triage in the ED") %>%
daqapo::detect_missing_values(level_of_aggregation = "column",
column = "acuity")
## Selected level of aggregation:column
## *** OUTPUT ***
## Absolute number of missing values in columnacuity:6976
## Relative number of missing values in columnacuity(expressed as percentage):1.64130363176073
##
## Overview of activity log rows in whichacuityis missing:
## # Log of 6976 events consisting of:
## 1 trace
## 6976 cases
## 6976 instances of 1 activity
## 1 resource
## Events occurred from NA until NA
##
## # Variables were mapped as follows:
## Case identifier: stay_id
## Activity identifier: activity
## Resource identifier: resource_id
## Timestamps: start, complete
##
## # A tibble: 6,976 × 35
## stay_id subject_id hadm_id activity gender race arrival_transport
## <int> <int> <int> <chr> <chr> <chr> <chr>
## 1 30001785 16061348 28146972 Triage in the ED <NA> <NA> <NA>
## 2 30003428 11889374 25084216 Triage in the ED <NA> <NA> <NA>
## 3 30003505 11229262 NA Triage in the ED <NA> <NA> <NA>
## 4 30003941 14334804 NA Triage in the ED <NA> <NA> <NA>
## 5 30004017 13419676 29317041 Triage in the ED <NA> <NA> <NA>
## 6 30004518 17237928 26689098 Triage in the ED <NA> <NA> <NA>
## 7 30006274 16169853 29415170 Triage in the ED <NA> <NA> <NA>
## 8 30007594 10554696 24910876 Triage in the ED <NA> <NA> <NA>
## 9 30008125 18064328 22704256 Triage in the ED <NA> <NA> <NA>
## 10 30011041 12155635 27459698 Triage in the ED <NA> <NA> <NA>
## # ℹ 6,966 more rows
## # ℹ 28 more variables: disposition <chr>, seq_num <int>, icd_code <chr>,
## # icd_version <int>, icd_title <chr>, temperature <dbl>, heartrate <dbl>,
## # resprate <dbl>, o2sat <dbl>, sbp <dbl>, dbp <dbl>, pain <chr>,
## # acuity <dbl>, chiefcomplaint <chr>, rhythm <chr>, name <chr>, gsn <int>,
## # ndc <dbl>, etc_rn <int>, etccode <int>, etcdescription <chr>, med_rn <int>,
## # gsn_rn <int>, resource_id <lgl>, activity_instance_id <dbl>, …
Detect Multiregistration for activity
Medicine reconciliation
activity_log %>%
bupaR::filter(activity == "Medicine reconciliation") %>%
daqapo::detect_multiregistration(level_of_aggregation = "case",
timestamp = "complete",
threshold_in_seconds = 61)
## Selected level of aggregation: case
## Selected timestamp parameter value: complete
## *** OUTPUT ***
## Multi-registration is detected for 270086 of the 304369 cases (88.74%) of the cases. These cases are:
##
## For the following rows in the activity log, multi-registration is detected:
## # Log of 2902153 events consisting of:
## 270086 cases
## 2902153 instances of 1 activity
## 1 resource
## Events occurred from NA until NA
##
## # Variables were mapped as follows:
## Case identifier: stay_id
## Activity identifier: activity
## Resource identifier: resource_id
## Timestamps: start, complete
##
## # A tibble: 2,902,153 × 35
## stay_id subject_id hadm_id activity gender race arrival_transport
## <int> <int> <int> <chr> <chr> <chr> <chr>
## 1 30000012 11714491 21562392 Medicine reconci… <NA> <NA> <NA>
## 2 30000012 11714491 21562392 Medicine reconci… <NA> <NA> <NA>
## 3 30000012 11714491 21562392 Medicine reconci… <NA> <NA> <NA>
## 4 30000012 11714491 21562392 Medicine reconci… <NA> <NA> <NA>
## 5 30000012 11714491 21562392 Medicine reconci… <NA> <NA> <NA>
## 6 30000012 11714491 21562392 Medicine reconci… <NA> <NA> <NA>
## 7 30000012 11714491 21562392 Medicine reconci… <NA> <NA> <NA>
## 8 30000012 11714491 21562392 Medicine reconci… <NA> <NA> <NA>
## 9 30000012 11714491 21562392 Medicine reconci… <NA> <NA> <NA>
## 10 30000012 11714491 21562392 Medicine reconci… <NA> <NA> <NA>
## # ℹ 2,902,143 more rows
## # ℹ 28 more variables: disposition <chr>, seq_num <int>, icd_code <chr>,
## # icd_version <int>, icd_title <chr>, temperature <dbl>, heartrate <dbl>,
## # resprate <dbl>, o2sat <dbl>, sbp <dbl>, dbp <dbl>, pain <chr>,
## # acuity <dbl>, chiefcomplaint <chr>, rhythm <chr>, name <chr>, gsn <int>,
## # ndc <dbl>, etc_rn <int>, etccode <int>, etcdescription <chr>, med_rn <int>,
## # gsn_rn <int>, resource_id <lgl>, activity_instance_id <dbl>, …
Detect Multiregistration for activity
Medicine dispensations
activity_log %>%
bupaR::filter(activity == "Medicine dispensations") %>%
daqapo::detect_multiregistration(level_of_aggregation = "case",
timestamp = "complete",
threshold_in_seconds = 61)
## Selected level of aggregation: case
## Selected timestamp parameter value: complete
## *** OUTPUT ***
## Multi-registration is detected for 210732 of the 295998 cases (71.19%) of the cases. These cases are:
##
## For the following rows in the activity log, multi-registration is detected:
## # Log of 1076495 events consisting of:
## 210732 cases
## 1076495 instances of 1 activity
## 1 resource
## Events occurred from NA until NA
##
## # Variables were mapped as follows:
## Case identifier: stay_id
## Activity identifier: activity
## Resource identifier: resource_id
## Timestamps: start, complete
##
## # A tibble: 1,076,495 × 35
## stay_id subject_id hadm_id activity gender race arrival_transport
## <int> <int> <int> <chr> <chr> <chr> <chr>
## 1 30000012 11714491 21562392 Medicine dispens… <NA> <NA> <NA>
## 2 30000012 11714491 21562392 Medicine dispens… <NA> <NA> <NA>
## 3 30000017 14230614 NA Medicine dispens… <NA> <NA> <NA>
## 4 30000017 14230614 NA Medicine dispens… <NA> <NA> <NA>
## 5 30000017 14230614 NA Medicine dispens… <NA> <NA> <NA>
## 6 30000017 14230614 NA Medicine dispens… <NA> <NA> <NA>
## 7 30000017 14230614 NA Medicine dispens… <NA> <NA> <NA>
## 8 30000017 14230614 NA Medicine dispens… <NA> <NA> <NA>
## 9 30000038 13821532 26255538 Medicine dispens… <NA> <NA> <NA>
## 10 30000038 13821532 26255538 Medicine dispens… <NA> <NA> <NA>
## # ℹ 1,076,485 more rows
## # ℹ 28 more variables: disposition <chr>, seq_num <int>, icd_code <chr>,
## # icd_version <int>, icd_title <chr>, temperature <dbl>, heartrate <dbl>,
## # resprate <dbl>, o2sat <dbl>, sbp <dbl>, dbp <dbl>, pain <chr>,
## # acuity <dbl>, chiefcomplaint <chr>, rhythm <chr>, name <chr>, gsn <int>,
## # ndc <dbl>, etc_rn <int>, etccode <int>, etcdescription <chr>, med_rn <int>,
## # gsn_rn <int>, resource_id <lgl>, activity_instance_id <dbl>, …
Detect Multiregistration for activity
Vital sign check
activity_log %>%
filter(activity == "Vital sign check") %>%
daqapo::detect_multiregistration(level_of_aggregation = "case",
timestamp = "complete",
threshold_in_seconds = 61)
## Selected level of aggregation: case
## Selected timestamp parameter value: complete
## *** OUTPUT ***
## Multi-registration is detected for 8666 of the 398828 cases (2.17%) of the cases. These cases are:
##
## For the following rows in the activity log, multi-registration is detected:
## # Log of 20078 events consisting of:
## 29 traces
## 8666 cases
## 20078 instances of 1 activity
## 1 resource
## Events occurred from NA until NA
##
## # Variables were mapped as follows:
## Case identifier: stay_id
## Activity identifier: activity
## Resource identifier: resource_id
## Timestamps: start, complete
##
## # A tibble: 20,078 × 35
## stay_id subject_id hadm_id activity gender race arrival_transport
## <int> <int> <int> <chr> <chr> <chr> <chr>
## 1 30000252 18684072 28532292 Vital sign check <NA> <NA> <NA>
## 2 30000252 18684072 28532292 Vital sign check <NA> <NA> <NA>
## 3 30001460 16680046 21068480 Vital sign check <NA> <NA> <NA>
## 4 30001460 16680046 21068480 Vital sign check <NA> <NA> <NA>
## 5 30001802 15750321 25185357 Vital sign check <NA> <NA> <NA>
## 6 30001802 15750321 25185357 Vital sign check <NA> <NA> <NA>
## 7 30002186 17728787 26340932 Vital sign check <NA> <NA> <NA>
## 8 30002186 17728787 26340932 Vital sign check <NA> <NA> <NA>
## 9 30007594 10554696 24910876 Vital sign check <NA> <NA> <NA>
## 10 30007594 10554696 24910876 Vital sign check <NA> <NA> <NA>
## # ℹ 20,068 more rows
## # ℹ 28 more variables: disposition <chr>, seq_num <int>, icd_code <chr>,
## # icd_version <int>, icd_title <chr>, temperature <dbl>, heartrate <dbl>,
## # resprate <dbl>, o2sat <dbl>, sbp <dbl>, dbp <dbl>, pain <chr>,
## # acuity <dbl>, chiefcomplaint <chr>, rhythm <chr>, name <chr>, gsn <int>,
## # ndc <dbl>, etc_rn <int>, etccode <int>, etcdescription <chr>, med_rn <int>,
## # gsn_rn <int>, resource_id <lgl>, activity_instance_id <dbl>, …
Detect invalid value range of acuity
activity_log %>%
bupaR::filter(activity == "Triage in the ED") %>%
daqapo::detect_value_range_violations(acuity = domain_numeric(from=1, to=5))
## $acuity
## $type
## [1] "numeric"
##
## $from
## [1] 1
##
## $to
## [1] 5
##
## attr(,"class")
## [1] "value_range" "list"
## *** OUTPUT ***
## The domain range for column acuity is checked.
## Values allowed between 1 and 5
## The values fall within the specified domain range for 418052 (98.36%) of the rows in the activity log and outside the domain range for 6976 (1.64%) of these rows.
##
## The following rows fall outside the specified domain range for indicated column:
## # Log of 6976 events consisting of:
## 1 trace
## 6976 cases
## 6976 instances of 1 activity
## 1 resource
## Events occurred from NA until NA
##
## # Variables were mapped as follows:
## Case identifier: stay_id
## Activity identifier: activity
## Resource identifier: resource_id
## Timestamps: start, complete
##
## # A tibble: 6,976 × 36
## column_checked stay_id subject_id hadm_id activity gender race
## <chr> <int> <int> <int> <chr> <chr> <chr>
## 1 acuity 30001785 16061348 28146972 Triage in the ED <NA> <NA>
## 2 acuity 30003428 11889374 25084216 Triage in the ED <NA> <NA>
## 3 acuity 30003505 11229262 NA Triage in the ED <NA> <NA>
## 4 acuity 30003941 14334804 NA Triage in the ED <NA> <NA>
## 5 acuity 30004017 13419676 29317041 Triage in the ED <NA> <NA>
## 6 acuity 30004518 17237928 26689098 Triage in the ED <NA> <NA>
## 7 acuity 30006274 16169853 29415170 Triage in the ED <NA> <NA>
## 8 acuity 30007594 10554696 24910876 Triage in the ED <NA> <NA>
## 9 acuity 30008125 18064328 22704256 Triage in the ED <NA> <NA>
## 10 acuity 30011041 12155635 27459698 Triage in the ED <NA> <NA>
## # ℹ 6,966 more rows
## # ℹ 29 more variables: arrival_transport <chr>, disposition <chr>,
## # seq_num <int>, icd_code <chr>, icd_version <int>, icd_title <chr>,
## # temperature <dbl>, heartrate <dbl>, resprate <dbl>, o2sat <dbl>, sbp <dbl>,
## # dbp <dbl>, pain <chr>, acuity <dbl>, chiefcomplaint <chr>, rhythm <chr>,
## # name <chr>, gsn <int>, ndc <dbl>, etc_rn <int>, etccode <int>,
## # etcdescription <chr>, med_rn <int>, gsn_rn <int>, resource_id <lgl>, …
Detect invalid value range of pain
activity_log %>%
bupaR::filter(activity == "Triage in the ED" | activity == "Vital sign check") %>%
bupaR::mutate(pain_num = as.numeric(pain)) %>%
daqapo::detect_value_range_violations(pain_num = domain_numeric(from=0, to=10))
## Warning: There was 1 warning in `bupaR::mutate()`.
## ℹ In argument: `pain_num = as.numeric(pain)`.
## Caused by warning:
## ! NAs introduced by coercion
## $pain_num
## $type
## [1] "numeric"
##
## $from
## [1] 0
##
## $to
## [1] 10
##
## attr(,"class")
## [1] "value_range" "list"
## *** OUTPUT ***
## The domain range for column pain_num is checked.
## Values allowed between 0 and 10
## The values fall within the specified domain range for 1304018 (70.53%) of the rows in the activity log and outside the domain range for 544744 (29.47%) of these rows.
##
## The following rows fall outside the specified domain range for indicated column:
## # Log of 544744 events consisting of:
## 235810 cases
## 544744 instances of 2 activities
## 1 resource
## Events occurred from NA until NA
##
## # Variables were mapped as follows:
## Case identifier: stay_id
## Activity identifier: activity
## Resource identifier: resource_id
## Timestamps: start, complete
##
## # A tibble: 544,744 × 37
## column_checked stay_id subject_id hadm_id activity gender race
## <chr> <int> <int> <int> <chr> <chr> <chr>
## 1 pain_num 30000039 13340997 23100190 Vital sign check <NA> <NA>
## 2 pain_num 30000039 13340997 23100190 Vital sign check <NA> <NA>
## 3 pain_num 30000039 13340997 23100190 Vital sign check <NA> <NA>
## 4 pain_num 30000112 13333760 NA Vital sign check <NA> <NA>
## 5 pain_num 30000202 15346940 NA Vital sign check <NA> <NA>
## 6 pain_num 30000202 15346940 NA Vital sign check <NA> <NA>
## 7 pain_num 30000202 15346940 NA Vital sign check <NA> <NA>
## 8 pain_num 30000204 11615015 25540031 Vital sign check <NA> <NA>
## 9 pain_num 30000204 11615015 25540031 Vital sign check <NA> <NA>
## 10 pain_num 30000204 11615015 25540031 Vital sign check <NA> <NA>
## # ℹ 544,734 more rows
## # ℹ 30 more variables: arrival_transport <chr>, disposition <chr>,
## # seq_num <int>, icd_code <chr>, icd_version <int>, icd_title <chr>,
## # temperature <dbl>, heartrate <dbl>, resprate <dbl>, o2sat <dbl>, sbp <dbl>,
## # dbp <dbl>, pain <chr>, acuity <dbl>, chiefcomplaint <chr>, rhythm <chr>,
## # name <chr>, gsn <int>, ndc <dbl>, etc_rn <int>, etccode <int>,
## # etcdescription <chr>, med_rn <int>, gsn_rn <int>, resource_id <lgl>, …